library(tidyverse)
library(sf)
library(tigris)
library(leaflet)
Create bar or line charts showing monthly total kBTUs of residential and commercial electricity and gas consumption for the Bay Area (meaning the sum of all ZIP codes in the 9 Bay Area counties) from 2017 to the latest available month
ca_counties <- counties("06", cb = T, progress_bar = F)
bay_county_names <-
c(
"Alameda",
"Contra Costa",
"Marin",
"Napa",
"San Francisco",
"San Mateo",
"Santa Clara",
"Solano",
"Sonoma"
)
bay_counties <-
ca_counties %>%
filter(NAME %in% bay_county_names)
usa_zips <-
zctas(cb = T, progress_bar = F)
bay_zips <-
usa_zips %>%
st_centroid() %>%
.[bay_counties, ] %>%
st_set_geometry(NULL) %>%
left_join(usa_zips %>% select(GEOID10)) %>%
st_as_sf()
bay_zipcodes <- bay_zips$ZCTA5CE10
#read data from 2017 to 1029
years <- c(2017,2018,2019)
quarters <- 1:4
pge_Gas <- NULL
pge_Elec <- NULL
for(year in years) {
for(quarter in quarters){
filename_Gas <-
paste0(
"PGE_",
year,
"_Q",
quarter,
"_GasUsageByZip.csv"
)
filename_Elec <-
paste0(
"PGE_",
year,
"_Q",
quarter,
"_ElectricUsageByZip.csv"
)
aux_Gas <- read_csv(filename_Gas)
pge_Gas <- rbind(pge_Gas, aux_Gas)
aux_Elec <- read_csv(filename_Elec)
pge_Elec <- rbind(pge_Elec, aux_Elec)
}
}
#read data for 2020
year <- 2020
quarters <- 1:2
for(quarter in quarters) {
filename_Gas <-
paste0(
"PGE_",
year,
"_Q",
quarter,
"_GasUsageByZip.csv"
)
filename_Elec <-
paste0(
"PGE_",
year,
"_Q",
quarter,
"_ElectricUsageByZip.csv"
)
aux_Gas <- read_csv(filename_Gas)
pge_Gas <- rbind(pge_Gas, aux_Gas)
aux_Elec <- read_csv(filename_Elec)
pge_Elec <- rbind(pge_Elec, aux_Elec)
}
#remove auxiliary variables
rm(aux_Gas)
rm(aux_Elec)
#abstract bay area electric consumption
pge_Elec$ZIPCODE <- as.character(pge_Elec$ZIPCODE)
pge_Elec_final <-
pge_Elec %>%
filter(
CUSTOMERCLASS %in%
c(
"Elec- Residential",
"Elec- Commercial"
)
) %>%
filter(
ZIPCODE %in%
bay_zipcodes
) %>%
select(
!c(COMBINED, AVERAGEKWH, TOTALCUSTOMERS)
) %>%
group_by(YEAR, MONTH, CUSTOMERCLASS) %>%
summarize(
TOTALKWH =
sum(
TOTALKWH,
na.rm = T
)
) %>%
mutate(
TOTALKBTU =
TOTALKWH*3.41214
) %>%
select(
!c(TOTALKWH)
)
#abstract bay area gas consumption
#during debugging found that the ZIPCODE in pge_Gas datafram is integer-typed
#therefore, need to transform data type
pge_Gas$ZIPCODE <- as.character(pge_Gas$ZIPCODE)
pge_Gas_final <-
pge_Gas %>%
filter(
CUSTOMERCLASS %in%
c(
"Gas- Residential",
"Gas- Commercial"
)
) %>%
filter(
ZIPCODE %in%
bay_zipcodes
) %>%
select(
!c(COMBINED, AVERAGETHM, TOTALCUSTOMERS)
) %>%
group_by(YEAR, MONTH, CUSTOMERCLASS) %>%
summarize(
TOTALTHM =
sum(
TOTALTHM,
na.rm = T
)
) %>%
mutate(
TOTALKBTU =
TOTALTHM*99.9761
) %>%
select(
!c(TOTALTHM)
)
# concatenate gas and electric usage data
pge <- rbind(pge_Gas_final,pge_Elec_final)
pge$MONTH <- month.abb[pge$MONTH]
pge_final <-
pge %>%
mutate(
Time =
paste0(
MONTH,
YEAR
)
)
#create a vector of characters containning ordered dates (MonYear)
time_levels = c()
time_levels_less = c()
for(i in seq(1, 83, by=2)){
time_levels <- c(time_levels,pge_final$Time[i])
}
for(i in seq(1, 83, by=8)){
time_levels_less <- c(time_levels_less,pge_final$Time[i])
}
pge_chart <-
pge_final %>%
ggplot() +
geom_bar(
aes(
x = Time %>% factor(
levels=time_levels,
ordered = TRUE),
y = TOTALKBTU,
fill = CUSTOMERCLASS
),
stat = "identity",
position = "stack"
) +
labs(
x = "Time",
y = "kBTU",
title = "PG&E Bay Area Monthly Gas & Electricity Usage, Jan 2017 to Jun 2020",
fill = "Gas or Electricity Type"
) +
scale_x_discrete(breaks = time_levels_less)
PG&E Bay Area Monthly total gas & electricity Usage (in kBTU) from Jan 2017 to Jun 2020
As we can see from the bar chart, the total monthly electricity consumptions for commerical customers during the shelter-in-place period (March 2020 to June 2020) are smaller than those of the same months but in previous years when covid-19 didn’t lead to the closure of many commercial sectors.
Create at least one map that highlights which neighborhoods experienced the greatest change in electricity consumption before and after the pandemic began, and comment on your results.
#"DoI" means Duration of Interest, which is March to Jun (shelter-in-place)
#abstract data for the duratio of interest in 2020
pge_Elec_Bay_2020_DoI <-
pge_Elec %>%
select(
!c(COMBINED, AVERAGEKWH, TOTALCUSTOMERS)
) %>%
filter(
CUSTOMERCLASS %in%
c(
"Elec- Residential",
"Elec- Commercial"
)
) %>%
filter(
ZIPCODE %in% bay_zipcodes
) %>%
filter(
YEAR == 2020
) %>%
filter(
MONTH %in% c(3,4,5,6)
) %>%
group_by(ZIPCODE) %>%
summarize(
TOTALKWH = sum(TOTALKWH, na.rm = T)
)
pge_Elec_Bay_2019_DoI <-
pge_Elec %>%
select(
!c(COMBINED, AVERAGEKWH, TOTALCUSTOMERS)
) %>%
filter(
CUSTOMERCLASS %in%
c(
"Elec- Residential",
"Elec- Commercial"
)
) %>%
filter(
ZIPCODE %in% bay_zipcodes
) %>%
filter(
YEAR == 2019
) %>%
filter(
MONTH %in% c(3,4,5,6)
) %>%
group_by(ZIPCODE) %>%
summarize(
TOTALKWH = sum(TOTALKWH, na.rm = T)
)
#calculate the average total consuption of 2017, 2018, 2019 for each zipcode in bay area
pge_Elec_Bay_Past_DoI <-
pge_Elec %>%
select(
!c(COMBINED, AVERAGEKWH, TOTALCUSTOMERS)
) %>%
filter(
CUSTOMERCLASS %in%
c(
"Elec- Residential",
"Elec- Commercial"
)
) %>%
filter(
ZIPCODE %in% bay_zipcodes
) %>%
filter(
MONTH %in% c("3","4","5","6")
) %>%
filter(
YEAR %in% c(2017,2018,2019)
) %>%
group_by(ZIPCODE) %>%
summarize(
TOTALKWH = sum(TOTALKWH, na.rm = T)
) %>%
mutate(
AVERAGE_3YEAR =
TOTALKWH/3.0
) %>%
select(
!c(TOTALKWH)
)
elec_change_19_20 <- merge(
pge_Elec_Bay_2020_DoI, pge_Elec_Bay_2019_DoI, by = c("ZIPCODE")
) %>%
mutate(
CHANGE = TOTALKWH.x-TOTALKWH.y
) %>%
select(
!c(TOTALKWH.x,TOTALKWH.y)
) %>%
left_join(
bay_zips %>% select(GEOID10),
by = c("ZIPCODE" = "GEOID10")
) %>%
st_as_sf() %>%
st_transform(4326)
elec_change_past_20 <- merge(
pge_Elec_Bay_2020_DoI, pge_Elec_Bay_Past_DoI, by = c("ZIPCODE")
) %>%
mutate(
CHANGE = TOTALKWH - AVERAGE_3YEAR
) %>%
select(
!c(TOTALKWH,AVERAGE_3YEAR)
) %>%
left_join(
bay_zips %>% select(GEOID10),
by = c("ZIPCODE" = "GEOID10")
) %>%
st_as_sf() %>%
st_transform(4326)
res_pal_1 <- colorNumeric(
palette = c("white", "red"),
domain =
elec_change_19_20$CHANGE
)
map_1 <- leaflet() %>%
addTiles() %>%
addPolygons(
data = elec_change_19_20,
fillColor = ~res_pal_1(CHANGE),
color = "white",
opacity = 0.5,
fillOpacity = 0.5,
weight = 1,
label = ~paste0(
round(CHANGE),
" kWh total change in ",
ZIPCODE
),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(
data = elec_change_19_20,
pal = res_pal_1,
values = ~CHANGE,
title = "Total Residential & Commercial Electricity Consumption Change in kWh, 2019 vs 2020"
)
map_1
res_pal_2 <- colorNumeric(
palette = c("white", "red"),
domain =
elec_change_past_20$CHANGE
)
map_2 <- leaflet() %>%
addTiles() %>%
addPolygons(
data = elec_change_past_20,
fillColor = ~res_pal_2(CHANGE),
color = "white",
opacity = 0.5,
fillOpacity = 0.5,
weight = 1,
label = ~paste0(
round(CHANGE),
" kWh total change in ",
ZIPCODE
),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(
data = elec_change_past_20,
pal = res_pal_2,
values = ~CHANGE,
title = "Total Residential & Commercial Electricity Consumption Change in kWh, the average of 2017, 2018 2019 vs 2020"
)
map_2
res_pal_3 <- colorNumeric(
palette = "Blues",
domain =
-elec_change_19_20$CHANGE
)
map_3 <- leaflet() %>%
addTiles() %>%
addPolygons(
data = elec_change_19_20,
fillColor = ~res_pal_3(CHANGE),
color = "white",
opacity = 0.5,
fillOpacity = 0.5,
weight = 1,
label = ~paste0(
round(CHANGE),
" kWh total change in ",
ZIPCODE
),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(
data = elec_change_19_20,
pal = res_pal_3,
values = ~CHANGE,
title = "Total Residential & Commercial Electricity Consumption Change in kWh, 2019 vs 2020"
)
map_3
res_pal_4 <- colorNumeric(
palette = "Blues",
domain =
-elec_change_past_20$CHANGE
)
map_4 <- leaflet() %>%
addTiles() %>%
addPolygons(
data = elec_change_past_20,
fillColor = ~res_pal_4(CHANGE),
color = "white",
opacity = 0.5,
fillOpacity = 0.5,
weight = 1,
label = ~paste0(
round(CHANGE),
" kWh total change in ",
ZIPCODE
),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(
data = elec_change_past_20,
pal = res_pal_4,
values = ~CHANGE,
title = "Total Residential & Commercial Electricity Consumption Change in kWh, the average of 2017, 2018 2019 vs 2020"
)
map_4
Comment
The first map shows the change of total monthly electricity consumptions for commerical customers between covid-era (March 2020 to June 2020) and 2019. A postive change means an increase in monthly consumption since covid whereas an negative change means a decrease in monthly consumption.
From the map, a large number of census tracts experience a decrease in electricity consumption since shelter-in-place, compared to 2019. Although, it should be noted that this comparison is made between the average of three months of covid-era and the average of 12 months in 2019, and if we compare the three-month average of covid-era to any individual months of 2019, the results will certainly change.
The second map shows the change of total monthly electricity consumptions for commerical customers between covid-era (March 2020 to June 2020) and the three-year average monthly consumption of 2017, 2018, and 2019. Similarly to the first map, a postive change means an increase in monthly consumption since covid whereas an negative change means a decrease in monthly consumption.
In both first and second maps, two zones (confined by zipcodes) stands out in terms of increased electricity consumption: 94545 and 95014. Exactly why these two zones experience soared demands is not immediately apparent. More detailed household consumption data and analysis of the decomposition of electricity consumptions might help answer this question.
Map #3 and #4 are respectively the inverse of map #1 and map #2. In other words, a postive change in map #3 and #4 means a decrease in monthly consumption since covid whereas an negative change means an increase in monthly consumption. Now the two zones (94545 and 95014) stand out in grey.